import asyncio
import random
from concurrent.futures import ThreadPoolExecutor

from utils.base import Spider
import threading
import time
from utils.base import FakChromeUA
import requests
# def test(url,headers):
#     res = requests.get(url,headers)
#     print(res)
#     return res
# if __name__ == '__main__':
#     start = time.time()
#     url = 'http://www.baidu.com'
#
#     # for i in range(10):
#     headers = Spider.get_ua()
#     print(headers)
#     for i in range(10):
#         test(url,headers)
#         print('单线程')
# end = time.time()
# print(end-start)
# #
# def tests(url,i):
#     res = requests.get(url)
#     return res
# if __name__ == '__main__':
#     start = time.time()
#     url = 'http://www.baidu.com'
#     t = []
#     for i in range(10):
#         thread = threading.Thread(target = tests,args = (url,i))
#         t.append(thread)
#         thread.start()
#     for i in t:
#         i.join()
#     res = time.time()
#     print(res-start,'多线程')
# from utils.base import FakChromeUA
# def tests(url,headers):
#     res = requests.get(url)
#     return res
# if __name__ == '__main__':
#     start = time.time()
#     url = 'http://www.baidu.com'
#     headers = FakChromeUA.get_ua()
#     t = []
#     for i in range(20):
#         thread = threading.Thread(target = tests,args = (url,headers))
#         t.append(thread)
#         thread.start()
#     for i in t:
#         i.join()
#     res = time.time()
#     print(res-start)

# def tetst():
#     res = requests.get(url,i)

# def crow(url,headers):
#     res = requests.get(url,headers)
#
# t = []
# start = time.time()
# for i in range(10):
#     url = 'http://www.baidu.com'
#     headers = FakChromeUA.get_ua()
#     thead = threading.Thread(target = crow,args = (url,headers))
#     thead.start()
#     t.append(thead)
# for i in t:
#     i.join()
# print(time.time()-start)

# def crow(url,headers):
#     res = requests.get(url,headers)
#     return res
# if __name__ == '__main__':
#     t = []
#     url = 'http://www.baidu.com'
#     headers = FakChromeUA.get_ua()
#     for i in range(10):
#         thread = threading.Thread(target=crow,args=(url,headers))
#         end = time.time()
#         thread.start()
#         t.append(thread)
#     for i in t:
#         i.join()
#     print(time.time()-end)

# def crow(url,headers):
#     res = requests.get(url,headers)
#     return res
# if __name__ == '__main__':
#     url = 'http://www.baidu.com'
#     headers = FakChromeUA.get_ua()
#     t = []
#     start_time = time.time()
#     for i in range(10):
#         thread = threading.Thread(target=crow,args = (url,headers))
#         thread.start()
#         t.append(thread)
#     for i in t:
#         i.join()
#     print(time.time()-start_time)

# def crow(url,headers):
#     res = requests.get(url,headers)
#     return res
# if __name__ == '__main__':
#     url = 'http://www.baidu.com'
#     headers = FakChromeUA.get_ua()
#     t = []
#     start_time = time.time()
#     for i in range(20):
#         thread = threading.Thread(target=crow,args = (url,headers))
#         thread.start()
#         t.append(thread)
#     for i in t:
#         i.join()
#     print(time.time()-start_time)
#
# def crow(url):
#     s = Spider()
#     res = requests.get(url = url)
#     # print(f'1--->{s.headers}')
#     return res
# if __name__ == '__main__':
#     url = 'http://www.baidu.com'
#     headers = FakChromeUA.get_ua()
#     # print(f'2--->{headers}')
#     # print(headers)
#     t = []
#     start_time = time.time()
#     for i in range(10):
#         thread = threading.Thread(target = crow,args = (url,))
#         thread.start()
#         t.append(thread)
#     for i in t:
#         i.join()
#     print(time.time()-start_time)

# def crawls(url):
#     res = requests.get(url)
#     return res
# if __name__ == '__main__':
#     start = time.time()
#     base_url = 'https://pic.netbian.com/4kmenv/index_{}.html'
#     for i in range(2,50):
#         crawls(base_url.format(i))
#     end = time.time() - start
#     print(end,'单线程')



# def crawl(url):
#     s = Spider()
#     res = s.fetch(url = url)
#     return res
# if __name__ == '__main__':
#     start = time.time()
#     base_url = 'https://pic.netbian.com/4kmenv/index_{}.html'
#     t = []
#     for i in range(2, 100):
#         thread = threading.Thread(target = crawl,args = [base_url.format(i)])
#         t.append(thread)
#         thread.start()
#     for i in t:
#         i.join()
#     end = time.time() - start
#     print(end, '多线程')


# #  线程池
# from concurrent.futures import ThreadPoolExecutor
# def crawl1(url,headers):
#     s = Spider()
#     res = requests.get(url,headers)
#     print(res)
#     return res
# if __name__ == '__main__':
#     start = time.time()
#     base_url = 'http://www.baidu.com'
#     headers = Spider.get_ua()
#     with ThreadPoolExecutor(10) as f:
#         for i in range(2,16):
#             time.sleep(random.randint(300,500)/1000)
#             f.submit(crawl1,url=base_url.format(i),headers= headers)
#             print(base_url.format(i))
#     end = time.time() - start
#     print(end, '线程池')
#     #  0.7725784778594971

# # 线程池
# from concurrent.futures import ThreadPoolExecutor
# def crawl2(url):
#     res = requests.get(url)
#     print(res)
#     return res
# if __name__ == '__main__':
#     url = 'https://pic.netbian.com/4kmeinv/index_{}.html'
#     t = []
#     start = time.time()
#     with ThreadPoolExecutor(50) as f:
#         for i in range(2,100):
#             thread = f.submit(crawl2,url.format(i))
#             t.append(thread)
#     print(time.time() - start)

# =======================================================

import os
from concurrent.futures import ThreadPoolExecutor

headers = {'accept':'*/*',
'accept-encoding':'gzip,deflate, br',
'accept-language':'zh-CN,zh;q=0.9',
'cookie':'__guid=204504458.2408025195654280000.1622888663683.991; pgv_pvid=9673648344; ts_uid=2009483360; pac_uid=0_a2624f2d63dfb; iip=0; RK=BQxAmswvaT; ptcz=4d2172e137c93a4859459c0585201a228f32ae71fa1829986e4be04a684d44b8; uid_uin=144115200500678371; uid_type=2; uid_a2=5a20da6428ad391e12b12afb2e468d8c59f996ecd57b6871b2f26ab7cc2c8abfff8f4b2f945feab8385524d8c587760b02e8e6db63de2185e7f28ff4ffa2ee548b247cb901780027; auth_version=2.0; uin=144115200500678371; p_uin=144115200500678371; p_luin=144115200500678371; uid_origin_uid_type=2; ke_login_type=2; localInterest=[2001,2007,2056]; _pathcode=0.7639668724592987; tdw_auin_data=-; tdw_data={"ver4":"4","ver6":"","refer":"","from_channel":"","path":"B-0.7639668724592987","auin":"-","uin":"144115200500678371","real_uin":"144115200500678371"}; tdw_first_visited=1; tdw_data_testid=; tdw_data_flowid=; monitor_count=75; sessionPath=162908362286746337864096; pgv_info=ssid=s6600749714; ts_last=ke.qq.com/webcourse/3582874/103725756; Hm_lvt_0c196c536f609d373a16d246a117fd44=1628952105,1629002740,1629028769,1629083623; Hm_lpvt_0c196c536f609d373a16d246a117fd44=1629083623; tdw_data_new_2={"auin":"-","sourcetype":"","sourcefrom":"","ver9":"144115200500678371","uin":"144115200500678371","visitor_id":"9747383308756332","sessionPath":"162908362286746337864096',
'referer':'https://ke.qq.com/webcourse/3582874/103725756',
'sec-fetch-dest':'empty',
'sec-fetch-mode':'cors',
'sec-fetch-site':'same-origin',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}

from icecream import ic

base_path = r'G:\爬虫第四章\scrapy数据提取'
def crawl2(url,count):
    if not os.path.exists(fr'{base_path}\ts'):
        os.mkdir(fr'{base_path}\ts')
    with open(fr'{base_path}\ts\{count}.ts', 'wb')as ts:
        # print(url)
        res = requests.get(url.strip(),headers=headers)
        # time.sleep(1)
        ic(f'res---------->{res}')
        ts.write(res.content)
        ic(f'{count}---->done')
        ts.close()
def run(url,count):
    crawl2(url,count)

if __name__ == '__main__':
    if not os.path.exists(fr'{base_path}\mp4'):
        os.mkdir(fr'{base_path}\mp4')
    with open(fr'{base_path}\url.txt','r')as f:
        count = 0
        stat = time.time()

        # # 线程池 用时107秒
        with ThreadPoolExecutor(10) as f1:
            for url in f.readlines():
                print(url.strip())
                f1.submit(run,url,count)
                # time.sleep(0.01)
                count +=1
                ic(count)
        ic(time.time() - stat)

        # 多线程,线程太多,服务器会断开连接
        # list = []
        # for url in f.readlines():
        #     print(url.strip())
        #     thread = threading.Thread(target = crawl2,args = (url,count,))
        #     thread.start()
        #     count +=1
        #
        #     list.append(thread)
        # for j in list:
        #     j.join()

#  单线程
#         for url in f.readlines():
#             crawl2(url,count)
#             count+=1
#         print(time.time() - stat)

##==========================================================

# 协程
# import os
# import threading
# from utils.base import Spider
# import aiohttp,asyncio,aiofiles
#
# async def crawls(url,count):
#     '''下载任务'''
#     headers = {
#         'user-agent':Spider.get_ua()
#     }
#     async with aiohttp.ClientSession() as session:
#         async with session.get(url.strip(),headers=headers) as res:
#             async with aiofiles.open(fr'G:\爬虫第二章\1selelium\ts\{count}.ts','wb') as f:
#                 await f.write(await res.content.read())
#                 print(f'{count}>>>>>>done')
#
#
# async  def run(url,count):
#     # with open(r'G:\爬虫第二章\1selelium\url.txt')as url_list:
#     #     count = 0
#         tasks = []
#         # for url in url_list:
#         #     print(str(url))
#         # await crawls(url.strip(),count)
#         tasks.append(crawls(url.strip(),count))
#         await asyncio.wait(tasks)
# async def run1(url,count):
#     loop = await asyncio.get_event_loop()
#     await loop.run_until_complete(run(url,count))
# if __name__ == '__main__':
#     st = time.time()
#     with open(r'G:\爬虫第二章\1selelium\url.txt')as url_list:
#         count = 0
#         tasks = []
#         with ThreadPoolExecutor(50) as f8:
#             for url in url_list:
#                 print(url)
#                 f8.submit(run1,url,count)
#                 count+=1
#
#     print(time.time()-st)
